Model LUAD stage

Last modified 12:47 PM on Feb 22, 2016. This document, R session image, version history, knitr cache, figures, and other associated datasets are located in /inside/grotto/blin/trna-markers/luad/predict/.

library(plyr)
library(glmnet)
library(ROCR)
library(GenomicRanges)
library(stringr)
library(reshape2)
library(survival)
set.seed(12)
source('/inside/home/blin/lib/R/ggsurv.R')
load('/inside/home/blin/grotto/data/hg19-srnas.RData')
load('/inside/home/blin/grotto/trna-markers/process-reads/luad-counts.RData')
luad_clinical <- luad_clinical[match(colnames(luad_adjusted_counts), luad_clinical$barcode), ]
ntrials <- 200

Predict clinical covariates given counts

Generic function for classification of samples into discrete classes

source('predict.R') 

buildTestCompareGlms <- function(metadata, covariate, counts, ntrials) {
  # metadata: filtered metadata for covariate of interest
  # counts: filtered counts containing only sample with covariate of interest

  # set up feature sets
  load('/inside/home/blin/grotto/data/hg19-srnas.RData')
  tsrnas <- unique(srnas[srnas$class %in% c("fivehalf", "threehalf", "trailer")]$tx_name)
  mirnas <- unique(srnas[str_detect(srnas$class, "mi")]$tx_name)
  snornas <- unique(srnas[srnas$class == "snoRNA"]$tx_name)
  pirnas <- unique(srnas[srnas$class == "piRNA"]$tx_name)

  # create training/testing sets and feature sets
  system(paste0('echo Setting up training/testing sets'))
  datasets <- lapply(1:ntrials, function(i) setupTrainingTestingSets(metadata, covariate, counts))


  # build, test on scrambled data (control)
  system(paste0('echo Building/testing models - control'))
  control_tsrna_glms <- lapply(datasets, buildTestGlm, features = tsrnas, covariate = covariate, randomize = TRUE) # the same models are built twice, but the code is a lot cleaner this way
  control_mirna_glms <- lapply(datasets, buildTestGlm, features = mirnas, covariate = covariate, randomize = TRUE)
  control_feature_glms <- list(control_tsrna_glms, control_mirna_glms)
  control_roc <- ldply(mapply(parseGlms, glms = control_feature_glms, class = c("tsRNA, permuted", "miRNA, permuted"), SIMPLIFY = FALSE), identity)
  
  # build and predict testing data
  system(paste0('echo Building/testing models'))
  all_feature_glms <- lapply(list(tsrnas, mirnas, snornas, pirnas), function(features) {
    lapply(datasets, buildTestGlm, features = features, covariate = covariate, randomize = FALSE)
    })
  roc <- ldply(mapply(parseGlms, glms = all_feature_glms, class = c("tsRNA", "miRNA", "snoRNA", "piRNA"), SIMPLIFY = FALSE), identity)
  roc <- rbind(roc, control_roc)
  roc$Class <- factor(roc$Class, levels = c("tsRNA", "miRNA", "tsRNA, permuted", "snoRNA", "piRNA", "miRNA, permuted"))

  # create plots
  system(paste0('echo Creating summary plot'))
  plot <- plotGlms(roc)

  # create summary object
  summary <- list()
  summary$glms <- all_feature_glms
  summary$control_glms <- control_feature_glms
  summary$roc <- roc
  summary$plot <- plot
  summary$samples <- lapply(datasets, function(dataset) unname(dataset$participant_ids))
  summary
}

Cancer incidence

metadata <- luad_clinical
metadata$sample_type <- as.factor(metadata$sample_type)
sample_type <- buildTestCompareGlms(metadata = metadata, covariate = "sample_type", counts = luad_adjusted_counts, ntrials = ntrials)

sample_type$plot
plot of chunk sample-type-plot

plotAucs(sample_type$glms)
plot of chunk sample-type-auc-plot

Metastasis

metadata <- luad_clinical[luad_clinical$m_stage %in% c("M0", "M1", "M1a", "M1b") & luad_clinical$sample_type == "TP", ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$metastasis <- as.factor(ifelse(metadata$m_stage == "M0", "No metastasis" , "Metastasis"))
metastasis <- buildTestCompareGlms(metadata = metadata, covariate = "metastasis", counts = counts, ntrials = ntrials)

metastasis$plot
plot of chunk metastasis-plot

plotAucs(metastasis$glms)
plot of chunk metastasis-auc-plot

Cancer stage

# stage I, IA vs stage IIIA, IIIB, IV - essentially spreading vs non-spreading
metadata <- luad_clinical[luad_clinical$stage %in% c("Stage I", "Stage IA", "Stage IIIA", "Stage IIIB", "Stage IV") & luad_clinical$sample_type == "TP", ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$stage <- as.factor(ifelse(metadata$stage %in% c("Stage I", "Stage IA"), "Early stage" , "Late stage"))
stage_coarse <- buildTestCompareGlms(metadata = metadata, covariate = "stage", counts = counts, ntrials = ntrials)

stage_coarse$plot
plot of chunk stage-coarse-plot

plotAucs(stage_coarse$glms)
plot of chunk stage-coarse-auc-plot

Lymph node spread

metadata <- luad_clinical[luad_clinical$n_stage %in% c("N0", "N1", "N2", "N3") & luad_clinical$sample_type == "TP", ]
counts <- luad_adjusted_counts[, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$n_stage <- as.factor(ifelse(metadata$n_stage %in% c("N0"), "N0" , "N1+"))
lymph_spread <- buildTestCompareGlms(metadata = metadata, covariate = "n_stage", counts = counts, ntrials = ntrials)

lymph_spread$plot
plot of chunk lymph-plot

plotAucs(lymph_spread$glms)
plot of chunk lmyph-auc-plot

tsrna_coefs <- c(extractCoefs(sample_type$glms[[1]], ntrials), extractCoefs(stage_coarse$glms[[1]], ntrials))
tsrna_coefs <- data.frame(table(names(tsrna_coefs)), stringsAsFactors = FALSE)
colnames(tsrna_coefs) <- c("Name", "Frequency")
tsrna_coefs <- tsrna_coefs[order(tsrna_coefs$Frequency, decreasing = TRUE), ]
ggplot(tsrna_coefs) + geom_histogram(aes(x = Frequency)) + xlab(paste("No. times tsRNA selected in", ntrials, "GLMs"))
plot of chunk extract-coefs
mirna_coefs <- c(extractCoefs(sample_type$glms[[2]], ntrials), extractCoefs(stage_coarse$glms[[2]], ntrials))
mirna_coefs <- data.frame(table(names(mirna_coefs)), stringsAsFactors = FALSE)
colnames(mirna_coefs) <- c("Name", "Frequency")
mirna_coefs <- mirna_coefs[order(mirna_coefs$Frequency, decreasing = TRUE), ]
ggplot(mirna_coefs) + geom_histogram(aes(x = Frequency)) + xlab(paste("No. times miRNA selected in", ntrials, "GLMs"))
plot of chunk extract-coefs
save(tsrna_coefs, mirna_coefs, file = "coefs.RData")

Feature selection and predicting survival time

Build model, get coefficients

metadata <- luad_clinical[luad_clinical$sample_type == "TP" & luad_clinical$days_survived > 0 & luad_clinical$days_survived < 3000 & !is.na(luad_clinical$days_survived), ]
counts <- luad_adjusted_counts[rownames(luad_adjusted_counts) %in% tsrna_coefs[tsrna_coefs$Frequency > 10, ]$Name, colnames(luad_adjusted_counts) %in% metadata$barcode]
metadata$dummy <- as.factor(1)
dataset <- setupTrainingTestingSets(metadata, "dummy", counts)
training_counts <- t(dataset$training_counts)
training_response <- Surv(dataset$training_metadata$days_survived, dataset$training_metadata$vital_status)
model <- cv.glmnet(training_counts, training_response, family = "cox")
plot(model)
plot of chunk glmnet-cox-1
coef(model, s = model$lambda.min)
## 196 x 1 sparse Matrix of class "dgCMatrix"
##                                                       1
## tRNA-Ala-AGC-1-1_threehalf                 0.0054334527
## tRNA-Ala-AGC-1-1_trailer                   .           
## tRNA-Ala-AGC-3-1_threehalf                 .           
## tRNA-Ala-AGC-5-1_threehalf                 .           
## tRNA-Ala-AGC-8-1_trailer                   .           
## tRNA-Ala-CGC-4-1_threehalf                 .           
## tRNA-Ala-TGC-1-1_threehalf                 .           
## tRNA-Ala-TGC-5-1_trailer                   .           
## tRNA-Ala-TGC-6-1_threehalf                 .           
## tRNA-Arg-ACG-1-1_trailer                   .           
## tRNA-Arg-ACG-1-2_trailer                   .           
## tRNA-Arg-ACG-1-3_trailer                   .           
## tRNA-Arg-ACG-2-1_trailer                   .           
## tRNA-Arg-ACG-2-3_trailer                   .           
## tRNA-Arg-ACG-2-4_trailer                   .           
## tRNA-Arg-CCT-3-1_trailer                   .           
## tRNA-Arg-CCT-5-1_fivehalf                  .           
## tRNA-Arg-CCT-5-1_threehalf                 .           
## tRNA-Arg-CCT-6-1_fivehalf                  .           
## tRNA-Arg-TCG-1-1_trailer                   .           
## tRNA-Arg-TCG-5-1_trailer                   .           
## tRNA-Arg-TCT-2-1_trailer                   .           
## tRNA-Arg-TCT-3-1_trailer                   .           
## tRNA-Arg-TCT-4-1_trailer                   .           
## tRNA-Arg-TCT-5-1_threehalf                 .           
## tRNA-Asn-GTT-2-3_trailer                   .           
## tRNA-Asn-GTT-2-4_trailer                   .           
## tRNA-Asn-GTT-4-1_trailer                   0.0620001203
## tRNA-Asn-GTT-5-1_trailer                   .           
## tRNA-Asn-GTT-7-1_fivehalf                  .           
## tRNA-Asn-GTT-8-1_fivehalf                  .           
## tRNA-Asn-GTT-8-1_threehalf                 .           
## tRNA-Asn-GTT-12-1_fivehalf                 .           
## tRNA-Asn-GTT-15-1_fivehalf                 .           
## tRNA-Asn-GTT-16-1_threehalf                .           
## tRNA-Asn-GTT-16-2_threehalf                .           
## tRNA-Asn-GTT-16-3_threehalf                .           
## tRNA-Asn-GTT-19-1_fivehalf                 .           
## tRNA-Asn-GTT-19-1_threehalf                .           
## tRNA-Asn-GTT-21-1_threehalf                .           
## tRNA-Asp-GTC-2-9_trailer                   .           
## tRNA-Asp-GTC-2-11_trailer                  .           
## tRNA-Asp-GTC-6-1_threehalf                 .           
## tRNA-Asp-GTC-10-1_fivehalf                 .           
## tRNA-Cys-ACA-1-1_threehalf                 .           
## tRNA-Cys-GCA-1-1_trailer                   .           
## tRNA-Cys-GCA-12-1_trailer                  .           
## tRNA-Cys-GCA-17-1_trailer                  .           
## tRNA-Gln-CTG-2-1_trailer                   .           
## tRNA-Gln-CTG-12-1_threehalf                .           
## tRNA-Gln-TTG-4-1_threehalf                 .           
## tRNA-Gln-TTG-6-1_fivehalf                  .           
## tRNA-Glu-CTC-1-1_fivehalf                  .           
## tRNA-Glu-CTC-1-7_trailer                   .           
## tRNA-Glu-CTC-3-1_fivehalf                  .           
## tRNA-Glu-CTC-5-1_threehalf                 .           
## tRNA-Glu-CTC-8-1_threehalf                 .           
## tRNA-Glu-CTC-13-1_threehalf                0.0055847728
## tRNA-Glu-TTC-2-1_fivehalf                  .           
## tRNA-Glu-TTC-2-2_fivehalf                  .           
## tRNA-Glu-TTC-4-2_trailer                   .           
## tRNA-Glu-TTC-5-1_fivehalf                  .           
## tRNA-Glu-TTC-9-1_threehalf                 .           
## tRNA-Glu-TTC-13-1_fivehalf                 .           
## tRNA-Glu-TTC-13-1_threehalf                .           
## tRNA-Gly-CCC-1-1_fivehalf                  .           
## tRNA-Gly-CCC-1-1_trailer                   .           
## tRNA-Gly-CCC-1-2_trailer                   .           
## tRNA-Gly-CCC-2-1_trailer                   .           
## tRNA-Gly-CCC-4-1_threehalf                 .           
## tRNA-Gly-CCC-4-1_trailer                   .           
## tRNA-Gly-CCC-7-1_trailer                   .           
## tRNA-Gly-GCC-1-1_trailer                   .           
## tRNA-Gly-GCC-1-2_trailer                   .           
## tRNA-Gly-GCC-2-2_trailer                   .           
## tRNA-Gly-GCC-5-1_fivehalf                  .           
## tRNA-Gly-GCC-5-1_threehalf                 .           
## tRNA-Gly-TCC-4-1_trailer                   .           
## tRNA-His-GTG-1-1_trailer                   .           
## tRNA-His-GTG-1-5_trailer                   .           
## tRNA-Ile-AAT-3-1_fivehalf                  .           
## tRNA-Ile-AAT-5-1_trailer                   .           
## tRNA-Ile-AAT-5-4_trailer                   .           
## tRNA-Ile-AAT-7-1_fivehalf                  .           
## tRNA-Ile-AAT-7-2_fivehalf                  .           
## tRNA-Ile-AAT-9-1_threehalf                 .           
## tRNA-Ile-GAT-1-1_fivehalf                  .           
## tRNA-Ile-GAT-1-2_fivehalf                  .           
## tRNA-Ile-GAT-1-3_fivehalf                  .           
## tRNA-Ile-TAT-2-3_threehalf                 0.0004314436
## tRNA-Leu-AAG-1-1_trailer                   .           
## tRNA-Leu-AAG-1-2_trailer                   .           
## tRNA-Leu-AAG-3-1_trailer                   0.0270084946
## tRNA-Leu-AAG-5-1_fivehalf                  .           
## tRNA-Leu-AAG-8-1_fivehalf                 -0.0023054532
## tRNA-Leu-AAG-8-1_threehalf                 .           
## tRNA-Leu-CAA-4-1_threehalf                 .           
## tRNA-Leu-CAG-1-7_trailer                   .           
## tRNA-Leu-TAA-3-1_trailer                   .           
## tRNA-Leu-TAA-4-1_trailer                   .           
## tRNA-Lys-CTT-1-1_trailer                   .           
## tRNA-Lys-CTT-3-1_trailer                   .           
## tRNA-Lys-CTT-5-1_fivehalf                  .           
## tRNA-Lys-CTT-5-1_threehalf                 .           
## tRNA-Lys-CTT-8-1_threehalf                 .           
## tRNA-Lys-CTT-13-1_threehalf                .           
## tRNA-Lys-TTT-3-4_trailer                   .           
## tRNA-Lys-TTT-8-1_fivehalf                  .           
## tRNA-Lys-TTT-13-1_threehalf                .           
## tRNA-Phe-GAA-1-3_trailer                   .           
## tRNA-Phe-GAA-1-4_trailer                   .           
## tRNA-Phe-GAA-1-6_trailer                   .           
## tRNA-Phe-GAA-2-1_trailer                   .           
## tRNA-Phe-GAA-3-1_trailer                   .           
## tRNA-Pro-AGG-1-1_trailer                   .           
## tRNA-Pro-AGG-2-8_trailer                   .           
## tRNA-Pro-CGG-2-1_trailer                   .           
## tRNA-SeC-TCA-1-1_threehalf                 .           
## tRNA-Ser-ACT-1-1_threehalf                 0.0316741929
## tRNA-Ser-ACT-1-1_trailer                   1.1866070827
## tRNA-Ser-AGA-3-1_threehalf                 .           
## tRNA-Ser-CGA-2-1_trailer                   .           
## tRNA-Ser-CGA-4-1_trailer                   .           
## tRNA-Ser-GCT-1-1_threehalf                 .           
## tRNA-Ser-GCT-2-1_threehalf                 .           
## tRNA-Ser-GCT-4-2_trailer                   .           
## tRNA-Ser-TGA-1-1_threehalf                 .           
## tRNA-Thr-AGT-1-1_threehalf                 .           
## tRNA-Thr-AGT-1-3_threehalf                 .           
## tRNA-Thr-AGT-3-1_trailer                   .           
## tRNA-Thr-AGT-7-1_fivehalf                  .           
## tRNA-Thr-CGT-5-1_fivehalf                  .           
## tRNA-Thr-CGT-5-1_trailer                   .           
## tRNA-Thr-TGT-5-1_trailer                   .           
## tRNA-Trp-CCA-5-1_fivehalf                  .           
## tRNA-Tyr-ATA-1-1_fivehalf                  .           
## tRNA-Tyr-GTA-1-1_trailer                   .           
## tRNA-Tyr-GTA-3-1_threehalf                 .           
## tRNA-Tyr-GTA-3-1_trailer                   .           
## tRNA-Tyr-GTA-8-1_fivehalf                  .           
## tRNA-Tyr-GTA-8-1_threehalf                 .           
## tRNA-Tyr-GTA-9-1_fivehalf                  .           
## tRNA-Und-NNN-6-1_fivehalf                  .           
## tRNA-Val-AAC-2-1_trailer                   .           
## tRNA-Val-AAC-3-1_trailer                   .           
## tRNA-Val-AAC-4-1_threehalf                 .           
## tRNA-Val-AAC-5-1_trailer                   .           
## tRNA-Val-AAC-6-1_trailer                   .           
## tRNA-Val-AAC-7-1_fivehalf                  .           
## tRNA-Val-CAC-2-1_trailer                   .           
## tRNA-Val-CAC-3-1_fivehalf                  .           
## tRNA-Val-CAC-8-1_threehalf                 .           
## tRNA-Val-TAC-1-1_trailer                   .           
## tRNA-Val-TAC-3-1_trailer                   .           
## tRNA-iMet-CAT-1-1_trailer                  .           
## nmt-tRNA-Gln-TTG-9-1_threehalf             .           
## chrM.tRNA14-ProTGG_trailer                 .           
## chrM.tRNA21-GlnTTG_trailer                 .           
## chrM.tRNA18-CysGCA_trailer                 .           
## chrM.tRNA16-SerTGA_threehalf               .           
## chrM.tRNA11-HisGTG_fivehalf                .           
## chrM.tRNA11-HisGTG_threehalf               .           
## chrM.tRNA1-PheGAA_threehalf                .           
## chrM.tRNA10-ArgTCG_trailer                 .           
## chrM.tRNA7-AspGTC_trailer                  .           
## chrM.tRNA2-ValTAC_trailer                  .           
## chrM.tRNA19-AsnGTT_fivehalf                .           
## chr6_cox_hap2.tRNA3-AlaAGC_threehalf       .           
## chr6_mcf_hap5.tRNA26-ValAAC_trailer        .           
## chr6_dbb_hap3.tRNA14-AlaAGC_trailer        .           
## chr6_ssto_hap7.tRNA35-PseudoTTT_threehalf  .           
## chr6_mcf_hap5.tRNA19-AlaAGC_trailer        .           
## chr6_cox_hap2.tRNA25-AlaTGC_trailer        .           
## chr6_apd_hap1.tRNA14-PheGAA_trailer        .           
## chr6_apd_hap1.tRNA12-AlaTGC_trailer        .           
## chr6_apd_hap1.tRNA23-ValAAC_trailer        .           
## chr6_mcf_hap5.tRNA17-PheGAA_trailer        .           
## chr6_ssto_hap7.tRNA32-AlaAGC_trailer       .           
## chr6_dbb_hap3.tRNA12-PheGAA_trailer        .           
## chr6_ssto_hap7.tRNA25-AlaAGC_trailer       .           
## chr6_mann_hap4.tRNA18-PheGAA_trailer       .           
## chr6_apd_hap1.tRNA16-AlaAGC_trailer        .           
## chr6_qbl_hap6.tRNA24-AlaTGC_threehalf      .           
## chr6_mcf_hap5.tRNA9-LeuAAG_trailer         .           
## chr6_cox_hap2.tRNA39-PseudoTTT_threehalf   .           
## chr6_qbl_hap6.tRNA18-AlaTGC_trailer        .           
## chr6_mcf_hap5.tRNA15-AlaTGC_trailer        .           
## chr6_cox_hap2.tRNA29-AlaAGC_trailer        .           
## chr6_mann_hap4.tRNA16-AlaTGC_trailer       .           
## chr6_cox_hap2.tRNA38-AlaCGC_threehalf      .           
## chr6_cox_hap2.tRNA27-PheGAA_trailer        .           
## chr6_ssto_hap7.tRNA31-ArgCCG_threehalf     .           
## chr6_dbb_hap3.tRNA10-AlaTGC_trailer        .           
## chr6_ssto_hap7.tRNA21-AlaTGC_trailer       .           
## chr6_qbl_hap6.tRNA20-PheGAA_trailer        .           
## chr6_ssto_hap7.tRNA23-PheGAA_fivehalf      .
plot(model$glmnet.fit, label = TRUE)
plot of chunk glmnet-cox-1

Naive Cox model prediction using summed hazard ratios

testing_counts <- t(dataset$testing_counts)
testing_response <- as.factor(ifelse(dataset$testing_metadata$days_survived >= 1095, ">3", "<3")) # test 1 year survival rate
pred <- predict(model, newx = testing_counts, s = model$lambda.min) 
prob <- prediction(-pred, testing_response) # flip signs because a positive hazard ratio gives a higher risk of death
perf <- performance(prob, "tpr", "fpr")
roc <- data.frame(TPR = unlist(perf@y.values), FPR = unlist(perf@x.values))
auc <- unlist(performance(prob, "auc")@y.values)
auc <- round(auc, 3)
ggplot(roc) + geom_line(aes(x = FPR, y = TPR)) + geom_abline(intercept = 0, slope = 1, colour = "gray") + ylab("TPR") + xlab("FPR")
plot of chunk glmnet-cox-2

Classify into high/low risk groups

metadata <- dataset$testing_metadata
metadata$risk <- as.factor(ifelse(pred > median(pred), "High-risk", "Low-risk"))
km <- survfit(Surv(days_survived, vital_status) ~ risk, data = metadata)
ggsurv(km) + theme_bw() + ggtitle("Kaplan-Meier plot")
plot of chunk glmnet-cox-3
cox <- coxph(Surv(days_survived, vital_status) ~ risk, data = metadata)
summary(cox)
## Call:
## coxph(formula = Surv(days_survived, vital_status) ~ risk, data = metadata)
## 
##   n= 232, number of events= 59 
## 
##                 coef exp(coef) se(coef)     z Pr(>|z|)   
## riskLow-risk -0.7619    0.4668   0.2711 -2.81  0.00495 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## riskLow-risk    0.4668      2.142    0.2744    0.7941
## 
## Concordance= 0.631  (se = 0.041 )
## Rsquare= 0.034   (max possible= 0.864 )
## Likelihood ratio test= 8.13  on 1 df,   p=0.004352
## Wald test            = 7.9  on 1 df,   p=0.004949
## Score (logrank) test = 8.27  on 1 df,   p=0.00404

Build new logistic regression model using hazard ratios

save.session("predict.RSession")
## Saving search path..
## Saving list of loaded packages..
## Saving all data...
## Done.